********************************************************************************
*   
********************************************************************************
clear programs
clear all
set more off

capture program drop main
program define main
	disp "Calling subprograms"
	SummariseHouseholdTypes
	SummaryStatistics
	TimeTrends
	ProbitReEnroll
	ChallengeGaps
	Weather
end

*===============================================================================
* 1)  SummariseHouseholdTypes
capture program drop SummariseHouseholdTypes
program define SummariseHouseholdTypes
*===============================================================================

cd "$GeneralModified"
use "BalancedData",replace

keep id tps date kwhw lnkwhw buildingtype heating sitecode balanced*

*-------------------------------------------------------------------------------
* Table 1
*-------------------------------------------------------------------------------
*Keep 2006 data
keep if date<=563

*2006 average electricity use for Table 1
tabstat kwhw,s(mean sd) by(tps)

collapse kwhw,by(id buildingtype heating tps)

*Number of observations
tab tps 

*-------------------------------------------------------------------------------
* Table 1 frequency by type
*-------------------------------------------------------------------------------

*Switch order of tps by defining gr=2 for non-participants
gen grp=tps
replace grp=2 if tps==0

gen ht_bt = 1 if heating==1 & buildingtype==1
replace ht_bt = 2 if heating==1 & buildingtype==2
replace ht_bt = 3 if heating==1 & buildingtype==3
replace ht_bt = 4 if heating==1 & buildingtype==4
replace ht_bt = 5 if heating==1 & buildingtype==5
replace ht_bt = 6 if heating==1 & buildingtype>5

replace ht_bt = 7 if heating==0 & buildingtype==1
replace ht_bt = 8 if heating==0 & buildingtype==2
replace ht_bt = 9 if heating==0 & buildingtype==3
replace ht_bt = 10 if heating==0 & buildingtype==4
replace ht_bt = 11 if heating==0 & buildingtype==5
replace ht_bt = 12 if heating==0 & buildingtype>5

replace ht_bt = 13 if heating==2

*Percent building types for Table 1
tabulate ht_bt grp,col
tabulate ht_bt grp,col nofreq
//shows that totals still add up to the total observations from the paper

cd "$SaveOutputFiles"
tabout ht_bt grp using test.text,c(col) replace style(text)
tabout ht_bt grp using test.tex,c(col) replace style(tex) body

*-------------------------------------------------------------------------------
* Mean differences by heating-building type
*-------------------------------------------------------------------------------
eststo c7_1: reg kwhw tps if ht_bt==1
eststo c7_2: reg kwhw tps if ht_bt==2
eststo c7_3: reg kwhw tps if ht_bt==3
eststo c7_4: reg kwhw tps if ht_bt==4
eststo c7_5: reg kwhw tps if ht_bt==5
eststo c7_6: reg kwhw tps if ht_bt==6
eststo c7_7: reg kwhw tps if ht_bt==7
eststo c7_8: reg kwhw tps if ht_bt==8
eststo c7_9: reg kwhw tps if ht_bt==9
eststo c7_10: reg kwhw tps if ht_bt==10
eststo c7_11: reg kwhw tps if ht_bt==11
eststo c7_12: reg kwhw tps if ht_bt==12
eststo c7_13: reg kwhw tps if ht_bt==13

*Table of mean differences and t-stats in kwhw use by heating-tps groups
esttab c7_*, compress nogaps keep(tps) mti sfmt(%7.3f)

*http://repec.org/bocode/e/estout/advanced.html#advanced907
*export coefficients
esttab c7_*, compress nostar nogaps keep(tps) mti sfmt(%7.3f) noobs not 
mat list r(coefs)
mat A= r(coefs)'
matrix list A
matrix rownames A = names
matrix roweq A = names
esttab matrix(A,fmt(%7.1f)), plain nolines 
cd "$SaveOutputFiles"
esttab matrix(A,fmt(%7.1f)) using temp.tex,replace  plain nolines

*Export t-statistics
esttab c7_*, compress nogaps nostar mti sfmt(%7.3f) noobs cells(t) nocon
mat list r(coefs)
mat A= r(coefs)'
matrix list A
matrix rownames A = names
matrix roweq A = names
esttab matrix(A,fmt(%7.1f)), plain nolines 
cd "$SaveOutputFiles"
esttab matrix(A,fmt(%7.1f)) using temp.tex,replace  plain nolines

*Verification check
reg kwhw tps if heating==0 & buildingtype==1
//25.78, t-stat2 2.8 — as expected

*Show the percent difference between participant and non-participant, by group
*collapse to average use by participant and ht_bt
preserve
collapse kwhw,by(ht_bt tps)
reshape wide kwhw,i(ht_bt) j(tps)
gen diff=kwhw1-kwhw0
gen diff_pc=(kwhw1-kwhw0)/kwhw0
restore

*-------------------------------------------------------------------------------
*Appendix Table A.1 comparison with and without controlling for building types. 
*Suppress the building type FE's from the output.
*-------------------------------------------------------------------------------
eststo c1_1: reg kwhw tps
eststo c1_2: reg kwhw tps i.buildingtype i.heating
eststo c1_3: reg kwhw tps i.buildingtype#i.heating
esttab c1_1 c1_2 c1_3, compress nogaps keep(tps) mti l sfmt(%7.3f) wide

*-------------------------------------------------------------------------------
* Appendix Table A.2 Household characteristics (excluding All Residential)
*-------------------------------------------------------------------------------
tabulate buildingtype grp,col nofreq
tabulate heating grp,col nofreq

*Look at differences between tps=0 and tps=1 within groups
eststo c3_bt1: reg kwhw tps if buildingtype==1
eststo c3_bt2: reg kwhw tps if buildingtype==2
eststo c3_bt3: reg kwhw tps if buildingtype==3
eststo c3_bt4: reg kwhw tps if buildingtype==4
eststo c3_bt5: reg kwhw tps if buildingtype==5
eststo c3_bt999: reg kwhw tps if buildingtype==999
esttab c3_*, compress nogaps nostar keep(tps) mti sfmt(%7.3f)

eststo c2_ht0: reg kwhw tps if heating==0
eststo c2_ht1: reg kwhw tps if heating==1
eststo c2_ht2: reg kwhw tps if heating==2

*esttab c2_*, compress nogaps mti sfmt(%7.3f)
*esttab c3_*, compress nogaps mti sfmt(%7.3f)

*Export the coefficients for use in the Appendix table of differences in means
esttab c3_*, compress nostar nogaps keep(tps) mti sfmt(%7.3f) noobs not 
mat list r(coefs)
mat A= r(coefs)'
matrix list A
matrix rownames A = names
matrix roweq A = names
esttab matrix(A,fmt(%7.1f)), plain nolines 
cd "$SaveOutputFiles"
esttab matrix(A,fmt(%7.1f)) using example.tex,replace  plain nolines

*Export the t-stats for use in the Appendix table of differences in means
esttab c3_*, compress nogaps nostar mti sfmt(%7.3f) noobs cells(t) nocon keep(tps)
mat list r(coefs)
mat A= r(coefs)'
matrix list A
matrix rownames A = names
matrix roweq A = names
esttab matrix(A,fmt(%7.1f)), plain nolines 
cd "$SaveOutputFiles"
esttab matrix(A,fmt(%7.1f)) using example.tex,replace  plain nolines

*average use by heating type for % change calculation
tabstat kwhw if tps==0,by(buildingtype)
tabstat kwhw if tps==0,by(heating)

tabstat kwhw if tps==0
*14.92 kWh average increase for participants compared to non participants, once building type X heating is controled for, is 1.5% of 971.77kWh average use for non-participants.

*Extra comparisons for the Simpson's paradox. 
*-------------------------------------------------------------------------------
eststo c4_ht0_bt1: reg kwhw tps if heating==0 & buildingtype==1
eststo c4_ht0_bt2: reg kwhw tps if heating==0 & buildingtype==2
eststo c4_ht0_bt3: reg kwhw tps if heating==0 & buildingtype==3
eststo c4_ht0_bt4: reg kwhw tps if heating==0 & buildingtype==4
eststo c4_ht0_bt5: reg kwhw tps if heating==0 & buildingtype==5
eststo c4_ht0_bt999: reg kwhw tps if heating==0 & buildingtype==999
esttab c2_ht0 c4_*, compress nogaps nostar keep(tps) mti sfmt(%7.3f)

eststo c5_ht1_bt1: reg kwhw tps if heating==1 & buildingtype==1
eststo c5_ht1_bt2: reg kwhw tps if heating==1 & buildingtype==2
eststo c5_ht1_bt3: reg kwhw tps if heating==1 & buildingtype==3
eststo c5_ht1_bt4: reg kwhw tps if heating==1 & buildingtype==4
eststo c5_ht1_bt5: reg kwhw tps if heating==1 & buildingtype==5
eststo c5_ht1_bt999: reg kwhw tps if heating==1 & buildingtype==999
esttab c2_ht1 c5_*, compress nogaps nostar keep(tps) mti sfmt(%7.3f)

*Average differences by heating group
esttab c2_*, compress nogaps mti sfmt(%7.3f)
//tend to be negative within heating categories
//large difference for heating==1 of -179
//TPS being lower for participants than non-participants can occur because partipants are more likely to be apartments than SFDs
//heating is higher use than non-elec heating

tabulate buildingtype tps if heating==0,col nofreq
tabulate buildingtype tps if heating==1,col nofreq

esttab c3_*, compress nogaps mti sfmt(%7.3f)
//tend to be negative within building types. So, participants appear lower kWh than non-participants. 
//apartments have lower use than SFDs

tabulate heating tps,col nofreq
//on average, participants are more likely to be non-electic heating
tabulate heating tps if  buildingtype==1,col nofreq
tabulate heating tps if  buildingtype==2,col nofreq
tabulate heating tps if  buildingtype==3,col nofreq
tabulate heating tps if  buildingtype==4,col nofreq
tabulate heating tps if  buildingtype==5,col nofreq
//participants more likely to be non-electric heating within each group.
//expect lower average use for participants

//why is the average use lower for heating==1, but higher within each group for tps==1?
*Simpsons paradox!
tabstat kwhw if heating==1 & buildingtype==1,by(tps)

frame change default

*-------------------------------------------------------------------------------
*Household characteristics for Table 1 
*-------------------------------------------------------------------------------
*Keep only unique household info and merge in building characteristics
keep id tps buildingtype heating

cd "$datafrom"
merge 1:1 id using unique_buildingchars_class.dta,keep(3) nogen

*Summary statistics like building types
cd "$SaveOutputFiles"
*Average property value, floor area, and bedrooms
eststo m1: estpost tabstat value_tot, by(tps) statistics(mean sd N) columns(statistics)
eststo m2: estpost tabstat floorarea, by(tps) statistics(mean sd N) columns(statistics)
eststo m3: estpost tabstat bedrooms, by(tps) statistics(mean sd N) columns(statistics)

esttab m1 m2 m3,cells("mean sd") mti

*-------------------------------------------------------------------------------
* Summary statistics for all lower mainland residential households - FOR APPENDIX A.2
*-------------------------------------------------------------------------------
*For all BCA data I need buildingtype heating sitecode value_tot floorarea

cd "$datafrom"
use BCAssessmentBuildingTypes,clear
cd "$SaveOutputFiles"

gen bedrooms2=bedrooms
replace bedrooms2=5 if bedrooms>=5
gen tps=0

eststo m1: estpost tabstat value_tot floorarea if resid==1, listwise statistics(mean sd) columns(statistics)
esttab m1, cells("mean sd") nomtitle nonumber b(0) replace

end

*===============================================================================
* 2) Table 2: Probability of Challenge Outcomes
capture program drop SummaryStatistics
program define SummaryStatistics
*===============================================================================

cd "$GeneralModified"
use "BalancedData",replace

*Merge in total_kwh_num and similar totals, for evaluating if a household passed a challenge
rename reduc challenge_order
merge m:1 id challenge_order using Accounts_All,keepusing(total_kwh_num adjusted_historical_kwh_num adjusted_historical_kwh_num success2) keep(1 3) nogen
rename challenge_order reduc

*-------------------------------------------------------------------------------
*Table 2 of Challenges for paper
bysort id reduc: gen uniid_reduc=1 if _n==1 & reduc!=0

*Generate indicator for success
gen BCHchange=(total_kwh_num-adjusted_historical_kwh_num)/adjusted_historical_kwh_num if reduc!=0
gen BCHevalSuccess=1 if BCHchange<-0.095
replace BCHevalSuccess=0 if BCHevalSuccess==.

*Find mean Continue or not variable to include in the table of challenge outcomes
gen Continue=1 if num_challenges>reduc & reduc!=0
replace Continue=0 if Continue==.

*Look at the probability of continuing if successful?
gen Continue_Success1=1 if Continue==1 & BCHevalSuccess==1
replace Continue_Success1=0 if Continue==0 & BCHevalSuccess==1
gen Continue_Success0=1 if Continue==1 & BCHevalSuccess==0
replace Continue_Success0=0 if Continue==0 & BCHevalSuccess==0

*Look at probability of passing if failed/passed previous challenge
capture drop drop Lag_BCHevalSuccess*
bysort uniid_reduc id (reduc): gen Lag_BCHevalSuccess=BCHevalSuccess[_n-1] if uniid_reduc==1

gen BCHevalSuccess_Ls0=BCHevalSuccess if Lag_BCHevalSuccess==0
gen BCHevalSuccess_Ls1=BCHevalSuccess if Lag_BCHevalSuccess==1

cd "$SaveOutputFiles"
tabout reduc if uniid_reduc==1 & balancedset_all==1 & begin_challenge_1<=monthly("2014m8","YM") using SummaryStatsT_1.tex,replace c(mean BCHevalSuccess mean BCHevalSuccess_Ls0 mean BCHevalSuccess_Ls1 mean Continue mean Continue_Success0 mean Continue_Success1) sum format(2c) ptotal(none) npos(both) body

end


*===============================================================================
* 3) Figure A.2 — time trends
capture program drop TimeTrends
program define TimeTrends
*===============================================================================

cd "$GeneralModified"
use "BalancedData",replace

*-------------------------------------------------------------------------------
* Compare time trends for the different sets
*-------------------------------------------------------------------------------	
capture drop tps01ave_use
capture drop uni_date_tps01
bysort date tps: egen tps01ave_use=mean(kwhw)
bysort date tps: gen uni_date_tps01=1 if _n==1

bysort date tps balancedset_change: egen tps01ave_use_change=mean(kwhw) if balancedset_change==1
bysort date tps balancedset_change: gen uni_date_tps01_change=1 if _n==1 & balancedset_change==1

twoway line tps01ave_use date if uni_date_tps01==1 & tps==0 ///
	|| line tps01ave_use date if uni_date_tps01==1 & tps==1, ///
	|| line tps01ave_use_change date if uni_date_tps01_change==1 & tps==0 ///
	|| line tps01ave_use_change date if uni_date_tps01_change==1 & tps==1, ///
	legend(label(1 "All Non-participants") label(2 "All participants") label(3 "BS Change-Non-participants") label(4 "BS Participants")) ///
	xtitle(" ") ytitle(Monthly kWh) graphregion(color(white))
*REGRESSION_DID_SUMMARYSTATS T.5
graph rename TimeTrend_Compare3,replace

end


*===============================================================================
* Table D.1: Probit model for re-enrolling
capture program drop ProbitReEnroll
program define ProbitReEnroll
*===============================================================================

cd "$GeneralModified"
use "BalancedData",replace

*Merge in info needed like total_kwh_num
rename reduc challenge_order
merge m:1 id challenge_order using Accounts_All,keepusing(total_kwh_num adjusted_historical_kwh_num adjusted_historical_kwh_num success2) keep(1 3) nogen
rename challenge_order reduc

*Merge in info needed like total_kwh_num
cd "$datafrom"
merge m:1 id using unique_buildingchars_class,keepusing(floorarea value_tot bedrooms) keep(1 3) nogen

*-------------------------------------------------------------------------------
* Indicator for whether HH continues past the current challenge id or not
*-------------------------------------------------------------------------------
gen Continue=1 if num_challenges>reduc & reduc!=0
replace Continue=0 if Continue==.

* Use BCH success, mychange, and num_challenges to look at decision to continue to multiple challenges.
*Generate what BCH evaluates the % change is. This should match success2 as judged via the 0.095% threshold
gen BCHchange=(total_kwh_num-adjusted_historical_kwh_num)/adjusted_historical_kwh_num if reduc!=0
gen BCHevalSuccess=1 if BCHchange<-0.095
replace BCHevalSuccess=0 if BCHevalSuccess==.
tab BCHevalSuccess success2

bysort id reduc: gen uniid_reduc=1 if _n==1

*-------------------------------------------------------------------------------
*Calculate the pre-program use (2006) and SD away from it
gen year=year(dofm(date))
capture drop temp
bysort id: egen temp=mean(kwhw) if year==2006
bysort id: egen kwhw2006=mean(temp)
capture drop temp

bysort tps heating buildingtype: egen tpshtbt_ave2006_mean=mean(kwhw2006) if uniid_reduc==1 & reduc==1 & balancedset==1
bysort tps heating buildingtype: egen tpshtbt_ave2006_SD=sd(kwhw2006) if uniid_reduc==1 & reduc==1 & balancedset==1
gen ht_bt_from06mean_SD=(kwhw2006-tpshtbt_ave2006_mean)/tpshtbt_ave2006_SD

*Simplify dataset
keep if uniid_reduc==1 & (reduc<=2)

gen lnvalue_tot=ln(value_tot)
gen lnfloorarea=ln(floorarea)

gen bedrooms2=bedrooms
replace bedrooms2=5 if bedrooms>=5

*SELECTIONINTO C2OM PROBIT S.3.1.1
probit Continue i.heating i.buildingtype if uniid_reduc==1 & reduc==1 & balancedset==1 
local ar2=`e(r2_p)'
local chi2=`e(chi2)'
eststo Probit_S_3_1_1: margins, dydx(*) atmeans grand post
estadd scalar ar2=`ar2', replace: Probit_S_3_1_1
estadd scalar chi2=`chi2', replace: Probit_S_3_1_1

*SELECTIONINTO C2OM PROBIT S.3.1.2
*eststo C2OM_S_3_1_2: reg Continue i.heating i.buildingtype bedrooms2 lnvalue_tot lnfloorarea if uniid_reduc==1 & reduc==1, robust
probit Continue i.heating i.buildingtype bedrooms2 lnvalue_tot lnfloorarea if uniid_reduc==1 & reduc==1 & balancedset==1
local ar2=`e(r2_p)'
local chi2=`e(chi2)'
eststo Probit_S_3_1_2: margins ,dydx(*) atmeans grand post
estadd scalar ar2=`ar2', replace: Probit_S_3_1_2
estadd scalar chi2=`chi2', replace: Probit_S_3_1_2

*SELECTIONINTO C2OM PROBIT S.3.2.1
*eststo C2OM_S_3_2_1: reg Continue i.heating i.buildingtype BCHchange if uniid_reduc==1 & reduc==1, robust
probit Continue i.heating i.buildingtype BCHchange if uniid_reduc==1 & reduc==1, robust
local ar2=`e(r2_p)'
local chi2=`e(chi2)'
eststo Probit_S_3_2_1: margins ,dydx(*) atmeans grand post
estadd scalar ar2=`ar2', replace: Probit_S_3_2_1
estadd scalar chi2=`chi2', replace: Probit_S_3_2_1
//effect of controlling for BCHchange

*SELECTIONINTO C2OM PROBIT S.3.2.2
*eststo C2OM_S_3_2_2: reg Continue i.heating i.buildingtype i.BCHevalSuccess if uniid_reduc==1 & reduc==1 & balancedset==1, robust
probit Continue i.heating i.buildingtype i.BCHevalSuccess if uniid_reduc==1 & reduc==1 & balancedset==1, robust
local ar2=`e(r2_p)'
local chi2=`e(chi2)'
eststo Probit_S_3_2_2: margins ,dydx(*) atmeans grand post
estadd scalar ar2=`ar2', replace: Probit_S_3_2_2
estadd scalar chi2=`chi2', replace: Probit_S_3_2_2


*SELECTIONINTO C2OM PROBIT S.3.2.3
probit Continue i.heating i.buildingtype BCHchange i.BCHevalSuccess if uniid_reduc==1 & reduc==1 & balancedset==1, robust
local ar2=`e(r2_p)'
local chi2=`e(chi2)'
eststo Probit_S_3_2_3: margins ,dydx(*) atmeans grand post
estadd scalar ar2=`ar2', replace: Probit_S_3_2_3
estadd scalar chi2=`chi2', replace: Probit_S_3_2_3
esttab Probit_S_3_2_3, compress nogaps star(* 0.10 ** 0.05 *** 0.01) se l mti

probit Continue ht_bt_from06mean_SD i.heating i.buildingtype if uniid_reduc==1 & reduc==1 & balancedset==1, robust
local ar2=`e(r2_p)'
local chi2=`e(chi2)'
eststo Probit_S_4_2_2: margins ,dydx(*) atmeans grand post
estadd scalar ar2=`ar2', replace: Probit_S_4_2_2
estadd scalar chi2=`chi2', replace: Probit_S_4_2_2

probit Continue ht_bt_from06mean_SD i.heating i.buildingtype i.BCHevalSuccess if uniid_reduc==1 & reduc==1 & balancedset==1, robust
local ar2=`e(r2_p)'
local chi2=`e(chi2)'
eststo Probit_S_4_2_3: margins ,dydx(*) atmeans grand post
estadd scalar ar2=`ar2', replace: Probit_S_4_2_3
estadd scalar chi2=`chi2', replace: Probit_S_4_2_3

*Estimates output
esttab Probit_S_3_1_1 Probit_S_3_1_2 Probit_S_3_2_1 Probit_S_3_2_3 Probit_S_4_2_2 Probit_S_4_2_3, compress nogaps star(* 0.10 ** 0.05 *** 0.01) se mti l scalar(N ar2 chi2) sfmt(%7.3f)

esttab Probit_S_3_1_1 Probit_S_3_1_2 Probit_S_3_2_1 Probit_S_3_2_3 Probit_S_4_2_2 Probit_S_4_2_3, compress nogaps star(* 0.10 ** 0.05 *** 0.01) se mti l scalar(N ar2 chi2) sfmt(%7.3f) drop (0.* 1.building*)
//dropping the reference groups (heating==0, 1STY SFD, winter

esttab Probit_S_3_1_1 Probit_S_3_1_2 Probit_S_3_2_1 Probit_S_3_2_3 Probit_S_4_2_2 Probit_S_4_2_3, compress nogaps star(* 0.10 ** 0.05 *** 0.01) se mti l scalar(N ar2 chi2) sfmt(%7.3f) drop (0.* 1.building*) tex
//tex output

*-------------------------------------------------------------------------------
*Probit model for selection into a third challenge
*-------------------------------------------------------------------------------
gen Continue2=1 if num_challenges>=3
replace Continue2=0 if Continue2==.

probit Continue2 i.heating i.buildingtype if uniid_reduc==1 & reduc==2 & balancedset==1 
local ar2=`e(r2_p)'
local chi2=`e(chi2)'
eststo Probit_S_3_1_1_2: margins, dydx(*) atmeans grand post
estadd scalar ar2=`ar2', replace: Probit_S_3_1_1_2
estadd scalar chi2=`chi2', replace: Probit_S_3_1_1_2

probit Continue2 i.heating i.buildingtype bedrooms2 lnvalue_tot lnfloorarea if uniid_reduc==1 & reduc==2 & balancedset==1
local ar2=`e(r2_p)'
local chi2=`e(chi2)'
eststo Probit_S_3_1_2_2: margins ,dydx(*) atmeans grand post
estadd scalar ar2=`ar2', replace: Probit_S_3_1_2_2
estadd scalar chi2=`chi2', replace: Probit_S_3_1_2_2

probit Continue2 i.heating i.buildingtype BCHchange if uniid_reduc==1 & reduc==2, robust
local ar2=`e(r2_p)'
local chi2=`e(chi2)'
eststo Probit_S_3_2_1_2: margins ,dydx(*) atmeans grand post
estadd scalar ar2=`ar2', replace: Probit_S_3_2_1_2
estadd scalar chi2=`chi2', replace: Probit_S_3_2_1_2

probit Continue2 i.heating i.buildingtype BCHchange i.BCHevalSuccess if uniid_reduc==1 & reduc==2 & balancedset==1, robust
local ar2=`e(r2_p)'
local chi2=`e(chi2)'
eststo Probit_S_3_2_2_2: margins ,dydx(*) atmeans grand post
estadd scalar ar2=`ar2', replace: Probit_S_3_2_2_2
estadd scalar chi2=`chi2', replace: Probit_S_3_2_2_2

*Extend ht_bt_from06mean_SD to other reduc challenges
capture drop temp
gen temp=ht_bt_from06mean_SD
capture drop ht_bt_from06mean_SD
bysort id: egen ht_bt_from06mean_SD=mean(temp)

probit Continue2 ht_bt_from06mean_SD i.heating i.buildingtype if uniid_reduc==1 & reduc==2 & balancedset==1, robust
local ar2=`e(r2_p)'
local chi2=`e(chi2)'
eststo Probit_S_4_2_2_2: margins ,dydx(*) atmeans grand post
estadd scalar ar2=`ar2', replace: Probit_S_4_2_2_2
estadd scalar chi2=`chi2', replace: Probit_S_4_2_2_2

probit Continue2 ht_bt_from06mean_SD i.heating i.buildingtype i.BCHevalSuccess if uniid_reduc==1 & reduc==2 & balancedset==1, robust
local ar2=`e(r2_p)'
local chi2=`e(chi2)'
eststo Probit_S_4_2_3_2: margins ,dydx(*) atmeans grand post
estadd scalar ar2=`ar2', replace: Probit_S_4_2_3_2
estadd scalar chi2=`chi2', replace: Probit_S_4_2_3_2

*SELECTIONINTO C3OM PROBIT S.Nice Format
esttab Probit_S_3_1_1_2 Probit_S_3_1_2_2 Probit_S_3_2_1_2 Probit_S_3_2_2_2 Probit_S_4_2_2_2 Probit_S_4_2_3_2, compress nogaps star(* 0.10 ** 0.05 *** 0.01) se mti l scalar(N ar2 chi2) sfmt(%7.3f)
esttab Probit_S_3_1_1_2 Probit_S_3_1_2_2 Probit_S_3_2_1_2 Probit_S_3_2_2_2 Probit_S_4_2_2_2 Probit_S_4_2_3_2, compress nogaps star(* 0.10 ** 0.05 *** 0.01) se mti l scalar(N ar2 chi2) sfmt(%7.3f) drop (0.* 1.building*) tex

end

*===============================================================================
* Appendix Table A.3: Gaps between challenges
capture program drop ChallengeGaps
program define ChallengeGaps
*===============================================================================

disp "Calling Program Regression_DID_summarystats"
cd "$GeneralModified"
use "BalancedData",replace

keep if tps==1 & uniid==1

*-------------------------------------------------------------------------------
* Appendix Table A.3: gap_months histograms
*-------------------------------------------------------------------------------

*What is the distribution of months between the first challenge and second challenge?
*Want the length of gap between begin_challenges
gen reduc_months_2=begin_challenge_2-begin_challenge_1-12
gen reduc_months_3=begin_challenge_3-begin_challenge_2-12
gen reduc_months_4=begin_challenge_4-begin_challenge_3-12
gen reduc_months_5=begin_challenge_5-begin_challenge_4-12

twoway hist reduc_months_2 if uniid==1 & num_challenges>1 & balancedset_change==1 , ///
	discrete w(1) xtitle("") ytitle("") xlabel(0(12)80) ///
	title(1st-2nd Challenge) graphregion(color(white))
graph rename gap1,replace
su reduc_months_2 if uniid==1 & num_challenges>1 & balancedset_change==1,detail

twoway hist reduc_months_3 if uniid==1 & num_challenges>2 & balancedset_change==1, ///
	discrete w(1) xtitle("") ytitle("") xlabel(0(12)72) ///
	title(2nd-3rd Challenge) graphregion(color(white))
graph rename gap2,replace
su reduc_months_3 if uniid==1 & num_challenges>2 & balancedset_change==1,detail
//median is 2 months, mean is 6

twoway hist reduc_months_4 if uniid==1 & num_challenges>3 & balancedset_change==1, ///
	discrete w(1) xtitle("") ytitle("") xlabel(0(12)64) ///
	title(3rd-4th Challenge) graphregion(color(white))
graph rename gap3,replace
su reduc_months_4 if uniid==1 & num_challenges>3 & balancedset_change==1,detail
//median 2, mean is 4

twoway hist reduc_months_5 if uniid==1 & num_challenges>4 & balancedset_change==1, ///
	discrete w(1) xtitle("") ytitle("") xlabel(0(12)30) ///
	title(4th-5th Challenge) graphregion(color(white))
graph rename gap4,replace
su reduc_months_5 if uniid==1 & tps==1 & num_challenges>4 & balancedset_change==1,detail
//median 1, mean 3

*REGRESSION_DID_SUMMARYSTATS T.4
graph combine gap1 gap2 gap3 gap4,graphregion(color(white))

*-------------------------------------------------------------------------------
* * Appendix Table X: Histogram of start dates
*-------------------------------------------------------------------------------
*Histogram of program update: histogram by date of when a household starts a challenge
twoway hist begin_challenge_1 if begin_challenge_1>monthly("2000m1","YM") & uniid==1 & balancedset_change==1, ///
	discrete w(1) ttitle("") ytitle("") yscale(range(0(0.02)0.06)) ylabel(0 0.02 0.04 0.06) ///
	title(1st Challenge) tlabel("") tscale(range(2006m1 2016m6)) graphregion(color(white))
graph rename startdatehistogram_c1,replace
hist begin_challenge_2 if begin_challenge_2>monthly("2000m1","YM") & uniid==1 & balancedset_change==1, ///
	discrete w(1) ttitle("") ytitle("") yscale(range(0(0.02)0.06)) ylabel(,nolabels) ///
	title(2nd Challenge) tlabel("") tscale(range(2006m1 2016m6)) graphregion(color(white))
graph rename startdatehistogram_c2,replace
hist begin_challenge_3 if begin_challenge_3>monthly("2000m1","YM") & uniid==1 & balancedset_change==1, ///
	discrete w(1) ttitle("") ytitle("") yscale(range(0(0.02)0.08)) ylabel(0 0.02 0.04 0.06 0.08) ///
	title(3rd Challenge) tscale(range(2006m1 2016m6)) tlabel(2006m1 2008m1 2010m1 2012m1 2014m1 2016m1) graphregion(color(white))
graph rename startdatehistogram_c3,replace
hist begin_challenge_4 if begin_challenge_4>monthly("2000m1","YM") & uniid==1 & balancedset_change==1, ///
	discrete w(1) ttitle("") ytitle("") yscale(range(0(0.02)0.08)) ylabel(,nolabels) ///
	title(4th Challenge) tscale(range(2006m1 2016m6)) tlabel(2006m1 2008m1 2010m1 2012m1 2014m1 2016m1) graphregion(color(white))
graph rename startdatehistogram_c4,replace

graph combine startdatehistogram_c1 startdatehistogram_c2 startdatehistogram_c3 startdatehistogram_c4,imargin(small) graphregion(color(white))
graph rename HISTS1,replace

end


*===============================================================================
*Table weather adjustment discrpencies
capture program drop Weather
program define Weather
*===============================================================================
disp "Calling Program Weather"
cd "$GeneralModified"
use "BalancedData",replace

*Simplify dataset
keep if tps==1
drop t_t_m*
forvalues i=0/10 {
	capture drop t_a`i'_*
	capture drop t_b`i'_*

}
*- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
*Calculate the total kwhw int eh 12 months before and during a challenge. 
*These totals are used to find the percent change in use over a first challenge. 
*- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

*Generate numobs variables
*numobs: by prepostyear
gen prepostyears=0 if t_a0==1
replace prepostyears=-3 if t_b3==1
replace prepostyears=-2 if t_b2==1
replace prepostyears=-1 if t_b1==1
replace prepostyears=1 if t_a1==1
replace prepostyears=2 if t_a2==1

capture drop temp
bysort id prepostyears: egen temp=total(kwhw) if t_b1==1
bysort id: egen tot_prepost_b1=mean(temp)

capture drop temp
bysort id prepostyears: egen temp=total(kwhw) if t_a0==1
bysort id: egen tot_prepost_a0=mean(temp)

*-------------------------------------------------------------------------------
*Merge in hdd data
*-------------------------------------------------------------------------------
cd "$datafrom"
rename date year_month
merge m:1 year_month using hdd_data,keep(3) nogen
rename year_month date 

*-------------------------------------------------------------------------------
*Merge in BCH reported changes in target_kwh_num total_kwh_num from Accounts_All
*-------------------------------------------------------------------------------
rename reduc challenge_order
cd "$GeneralModified"
merge m:1 id challenge_order using Accounts_All.dta,keepusing(target_kwh_num total_kwh_num adjusted_historical_kwh_num)
rename challenge_order reduc

//_merge==1: should be all reduc==0 accounts as these have no challegne record
tab reduc if _merge==1
//_merge==2: should be all tps==0 accounts
tab _merge if tps==.,miss
//_merge==3; the remaining
keep if _merge!=2
drop _merge

*-------------------------------------------------------------------------------
*Create changes in kWh adjusted for weather
*-------------------------------------------------------------------------------
*generate total of HDD
capture drop temp
bysort id prepostyears: egen temp=total(hdd) if t_b1==1
bysort id: egen tot_hdd_b1=mean(temp)

capture drop temp
bysort id prepostyears: egen temp=total(hdd) if t_a0==1
bysort id: egen tot_hdd_a0=mean(temp)
capture drop temp

*Generate pc change in hdd over the first challenge
gen hdd_pc=(tot_hdd_a0-tot_hdd_b1)/tot_hdd_b1
//pc change in hdd days from b1 to a0.

*Generate observed changes in electricity use
gen mychange=(tot_prepost_a0-tot_prepost_b1)/tot_prepost_b1
//percent change in billed kwhw that I measure

gen BCHchange=(total_kwh_num-adjusted_historical_kwh_num)/adjusted_historical_kwh_num if reduc!=0
//BCH reported change in credited electricity use, based on their reported totals. This is what is reported, to customers.

*mytaret is the weather adjusted target with the correct weather adjustment applied.
*for non-electric heat households, the target is the previous years use
gen mytarget=tot_prepost_b1 if inlist(heating,0,2) & reduc==1
*for electric heat households, the target is adjused for the percent change in HDD applied to 46% of electricity use that BCH estimatescomes from electric heating.
*if hdd_pc is positive, this is an increase in hdd from b1 (pre program) to a1 (first challenge year)
*if hdd_pc is positive, this implies an increase in the demand for electric heating. As a result, the target level should be increased so that reductions are 'easier' to achieve relative to the adjusted target, compared to their previous years billed use.
*hdd data was verified separately. 
replace mytarget=tot_prepost_b1*(1+hdd_pc*0.46) if inlist(heating,1) & reduc==1
//Key weather adjustment formula 

gen mychange_adj=(tot_prepost_a0-mytarget)/mytarget
*mychange_adj is the credited change in kWh adjusted correctly for weather

*- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*What if I use the opposite sign in the weather correction, and apply this equally to electric and non-electric heating households?
*- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
gen mytarget_adj_neg=tot_prepost_b1*(1-hdd_pc*0.46) if reduc==1
*What happens: If the weather is colder, then ppl use more electricity. HDD change will rise. If it's colder, then it's harder to reduce ones electricity use and the 10% reduction target should be made easier.
*An easier target means that mytarget should increase (target electricity use has risen)
*BUT, based on the above formula, if HDD_pc increases then (1-hdd_pc) gets smaller, and the target will decrease...
gen mychange_adj_neg=(tot_prepost_a0-mytarget_adj_neg)/mytarget_adj_neg

*-------------------------------------------------------------------------------
*Compare success defined by mychange, BCCHange, mychange_adj, and mychange_adj_neg to BC Hydro measurement of success
*-------------------------------------------------------------------------------
bysort id reduc: gen uniid_reduc=1 if _n==1

*BCH reported credited changes
gen BCHevalSuccess=1 if BCHchange<-0.095
replace BCHevalSuccess=0 if BCHevalSuccess==.

*Bill changes
gen mychangeSuccess=1 if mychange<-0.095
replace mychangeSuccess=0 if mychangeSuccess==.

*Credited changes adjusted correctly for weather
gen mychangeSuccess_adj=1 if reduc==1 & mychange_adj<-0.095
replace mychangeSuccess_adj=0 if mychangeSuccess_adj==.

*Recreation of weather correction with opposite sign
gen mychangeSuccess_adj_neg=1 if reduc==1 & mychange_adj_neg<-0.095
replace mychangeSuccess_adj_neg=0 if mychangeSuccess_adj_neg==.


tab BCHevalSuccess mychangeSuccess if reduc==1 & uniid_reduc==1,miss
// close overlap between BCH measure of success and billed changes

tab BCHevalSuccess mychangeSuccess_adj if reduc==1 & uniid_reduc==1,miss
//this 'correct' weather adjustment has worse overlap with BCH measure of success than the non-adjusted

tab BCHevalSuccess mychangeSuccess_adj_neg if reduc==1 & uniid_reduc==1,miss
//unclear if this is an improvement or not in matching BCH measure of success

//suspect that weather correction was removed circa 2015. So look at the errors for only earlier challenges

tab BCHevalSuccess mychangeSuccess if reduc==1 & uniid_reduc==1 & begin_challenge_1<monthly("2013m6","YM"),miss 
//526 errors
tab BCHevalSuccess mychangeSuccess_adj if reduc==1 & uniid_reduc==1 & begin_challenge_1<monthly("2013m6","YM"),miss
//646 errors
tab BCHevalSuccess mychangeSuccess_adj_neg if reduc==1 & uniid_reduc==1 & begin_challenge_1<monthly("2013m6","YM"),miss 
//314 errors

*Introducing the negative sign error removes some, but not all, of the discrepency. 

*-------------------------------------------------------------------------------
*Histogram of weather adjustment differences
*-------------------------------------------------------------------------------
gen BCH_my_diff=mychange-BCHchange if reduc==1
gen BCH_my_diff_adj=mychange_adj-BCHchange if reduc==1

tw hist BCH_my_diff if reduc==1 & uniid_reduc==1 & BCH_my_diff<0.25 & BCH_my_diff>-0.25 & balancedset_change==1,w(0.005) fc(gs12) color(gs6) xscale(range(-0.25 0.25)) ///
	xlabel(-0.2(0.1)0.2 -0.2 "-20%" -0.1 "-10%" 0 "0%" 0.1 "10%" 0.2 "20%") ///
	graphregion(color(white)) ytitle(Density of Initial Challenge Reductions) xtitle(Absolute Difference) yscale(range(0 20)) title("Difference between" "Credited and Billed Changes")
graph rename POSTCHALLENGE_P14_3_0_5_1,replace

tw hist BCH_my_diff_adj if reduc==1 & uniid_reduc==1 & BCH_my_diff_adj<0.25 & BCH_my_diff_adj>-0.25 & balancedset_change==1,w(0.005) fc(gs12) color(gs6) xscale(range(-0.25 0.25)) ///
	xlabel(-0.2(0.1)0.2 -0.2 "-20%" -0.1 "-10%" 0 "0%" 0.1 "10%" 0.2 "20%") ///
	graphregion(color(white)) ytitle(Density) xtitle(Absolute Difference) yscale(range(0 20)) ytick(#4,grid) ylabel("") ///
	title("Difference between" "Credited and Updated Changes")
graph rename POSTCHALLENGE_P14_3_0_5_2,replace
//updated captions
*POSTCHALLENGE_P14_3_0_7
graph combine POSTCHALLENGE_P14_3_0_5_1 POSTCHALLENGE_P14_3_0_5_2,graphregion(color(white))
graph rename POSTCHALLENGE_P14_3_0_7,replace

end

*========================================================================
*Call the main program to run this file.
*========================================================================
main
